HTML.XIL is the default indexsheet for HTML content. The HTML.XIL
indexsheet includes definitions for common fields, handling of hit-anchors and
hit-highlighting, indexing of title tags, creation of table of contents
structure, and handling of break words.
<?xml version='1.0'?>
<!-- Default indexsheet for HTML -->
<xsl:stylesheet case-sensitive="no" xmlns:xsl=
"http://www.w3.org/1999/XSL/Transform" xmlns:np=
"http://www.rocketsoftware.com/ns/indexsheet/2.0" extension
-element-prefixes="np">
<np:definitions>
<field name="dc:title" type="text"
term-list="yes" proximity="no" relevance="highest" />
<field name="dc:creator" type="text"
term-list="yes" proximity="no" relevance="highest" />
<field name="dc:subject" type="text"
term-list="yes" proximity="no" relevance="highest" />
<field name="dc:description" type="text"
term-list="yes" proximity="no" relevance="highest" />
</np:definitions>
<xsl:template match='META[@name="description"]'>
<np:index-attribute name="content" field="dc:description"/>
</xsl:template>
<xsl:template match='META[@name="author"]'>
<np:index-attribute name="content" field="dc:creator"/>
</xsl:template>
<xsl:template match='META[@name="keywords"]'>
<np:index-attribute name="content" field="dc:subject"/>
</xsl:template>
<!-- hit-anchors are not allowed within an HTML "A"
element which is a link -->
<!-- Because of its importance, this rule is enforced
internally for HTML if not specified -->
<xsl:template match="A[attribute(HREF)]">
<np:index hit-anchor="postpone">
<xsl:apply-templates/>
</np:index>
</xsl:template>
<!-- Neither hit-anchor nor hit-hilite is allowed
within HTML "HEAD" element -->
<!-- A hit can occur within HTML "HEAD" element
when indexing TITLE or other text in heading -->
<xsl:template match='HEAD'>
<np:index hit-anchor="no" hit-hilite="no">
<xsl:apply-templates/>
</np:index>
</xsl:template>
<!-- It is better to not index title when it is the same
for all documents or the same as first heading -->
<!-- However the HTML "TITLE" element can be indexed as
long as a rule is used to not allow hit-anchor nor
hit-hilite -->
<xsl:template match='TITLE'>
<np:index index="no">
<xsl:apply-templates/>
</np:index>
</xsl:template>
<!-- Do not index SCRIPT -->
<xsl:template match='SCRIPT'>
<np:index index="no" hit-hilite="no">
<xsl:apply-templates/>
</np:index>
</xsl:template>
<!-- Do not index STYLE -->
<xsl:template match='STYLE'>
<np:index index="no" hit-hilite="no">
<xsl:apply-templates/>
</np:index>
</xsl:template>
<!-- Generate sub-document table of contents (TOC)
hierarchy from HTML headings H1 to H6 -->
<!-- The first heading found is used as document title -->
<xsl:template match='H1|H2|H3|H4|H5|H6'>
<np:index toc-heading="title-HTML" title-field=
"dc:title">
<xsl:apply-templates/>
</np:index>
</xsl:template>
<!-- proximity="paragraph" marks paragraphs for
paragraph proximity searching and automatic
abstract generation -->
<!-- break-word is needed when the P element is used
without surrounding whitespace to prevent words
from being stuck together -->
<xsl:template match="P">
<np:index proximity="paragraph" break-word="yes">
<xsl:apply-templates/>
</np:index>
</xsl:template>
<!-- A hit-total replace tag is placed at end of BODY
element. The form generatd by it is required for
next/prev hit functionality -->
<xsl:template match="BODY">
<np:index hit-total="yes" break-word="yes">
<xsl:apply-templates/>
</np:index>
</xsl:template>
<!-- break-word rule is needed when the following
elements are used without surrounding whitespace to
prevent words from being stuck together -->
<!-- Word breaks rules are now included by default,
but can optionally be edited or removed from
indexsheet for HTML -->
<xsl:template match="ADDRESS|BR|BLOCKQUOTE|BUTTON
|CENTER|DD|DT|DIV|FORM|FRAME|HR|IFRAME|IMG|INPUT
|ISINDEX|LI|NOFRAMES|NOSCRIPT|NOEMBED|OBJECT
|OPTION|PRE|PLAINTEXT|SPACER|TR|TD|TH|TABLE
|TEXTAREA|WBR">
<np:index break-word="yes">
<xsl:apply-templates/>
</np:index>
</xsl:template>
</xsl:stylesheet>
Copyright © 2006-2023, Rocket Software, Inc. All rights reserved.